setwd("~/Documents/MiGASti/Databases")
multi_qc_data = "~/Documents/MiGASti/Databases/multiqc_general_stats_533s.txt"
general_stats <- read.table("multiqc_general_stats_533s.txt", header=T, stringsAsFactors = FALSE)
multiqc_picard_coverage = read.table("mqc_picard_rna_coverage_1.txt", header = T, stringsAsFactors = F, check.names = F, sep = "\t", row.names = 1)
picard_rnaseqmetrics = read.table("mqc_picard_rnaseqmetrics_assignment_plot_1.txt", header = T, stringsAsFactors = F, check.names = F, sep = '\t')
mean_aligned = mean(general_stats$STAR_mqc.generalstats.star.uniquely_mapped, na.rm = T)
mean_aligned
[1] 27823519
median_aligned = median(general_stats$STAR_mqc.generalstats.star.uniquely_mapped, na.rm = T)
median_aligned
[1] 27826644
mapped_duplicates = general_stats$STAR_mqc.generalstats.star.uniquely_mapped * (1- general_stats$Picard_mqc.generalstats.picard.PERCENT_DUPLICATION)
ggplot() + geom_histogram(aes(mapped_duplicates))
median(mapped_duplicates)
[1] 16976546
sd(mapped_duplicates)
[1] 7587867
max_aligned = max(general_stats$STAR_mqc.generalstats.star.uniquely_mapped, na.rm = T)
na.omit(general_stats[general_stats$STAR_mqc.generalstats.star.uniquely_mapped == max_aligned, c("Sample", "STAR_mqc.generalstats.star.uniquely_mapped")])
Sample STAR_mqc.generalstats.star.uniquely_mapped
526 14-046-CC-LPS 127595848
min_aligned = min(general_stats$STAR_mqc.generalstats.star.uniquely_mapped , na.rm = T)
na.omit(general_stats[general_stats$STAR_mqc.generalstats.star.uniquely_mapped == min_aligned, c("Sample", "STAR_mqc.generalstats.star.uniquely_mapped")])
Sample STAR_mqc.generalstats.star.uniquely_mapped
91 18-064-GFM-IFNy 64850
ggplot(general_stats, aes(x=STAR_mqc.generalstats.star.uniquely_mapped)) +
geom_histogram(bins = 100,
colour="black", fill="white", xlab="test") +
labs(x="Uniquely mapped reads", y="Number of samples") +
geom_vline(xintercept = 10000000, linetype="dotted", color="red", size=0.7) +
scale_y_continuous(breaks = (1:100))
star_alignment_10m = general_stats$Sample[which(general_stats$STAR_mqc.generalstats.star.uniquely_mapped < 10000000)]
length(star_alignment_10m)
[1] 35
unique(star_alignment_10m)
[1] “18-021-GFM-DEX” “18-021-GFM-LPS” “18-021-THA-LPS”
[4] “18-064-GFM-ATP” “18-064-GFM-IFNy” “18-064-GFM-unstim” [7] “18-105-GTS-LPS” “18-105-GTS-unstim” “18-118-GFM-IL4”
[10] “MG-15-SN-RNA” “16-082-SVZ-IFNy” “16-082-SVZ-unstim” [13] “16-117-GFM-DEX” “16-117-SVZ-unstim” “17-003-GTS-LPS”
[16] “17-017-GFM-unstim” “17-029-GFM-LPS” “17-029-GTS-unstim” [19] “17-032-SVZ-IFNy” “17-078-GFM-TNFa” “13-087-CC-LPS”
[22] “14-053-CC-IFNy” “14-063-CC-LPS” “14-075-GFM-IFNy”
[25] “14-075-GFM-LPS” “14-075-GFM-TNFa” “14-075-GTS-LPS”
[28] “15-055-GFM-DEX” “15-089-GTS-unstim” “15-089-THA-LPS”
[31] “15-089-THA-unstim” “16-003-SVZ-IFNy” “16-003-SVZ-R848”
[34] “16-003-SVZ-TNFa” “16-003-SVZ-unstim”
star_alignment_5m = general_stats$Sample[which(general_stats$STAR_mqc.generalstats.star.uniquely_mapped < 5000000)]
length(star_alignment_5m)
[1] 21
unique(star_alignment_5m)
[1] “18-021-GFM-DEX” “18-021-GFM-LPS” “18-064-GFM-ATP”
[4] “18-064-GFM-IFNy” “18-064-GFM-unstim” “18-105-GTS-LPS”
[7] “18-105-GTS-unstim” “18-118-GFM-IL4” “MG-15-SN-RNA”
[10] “16-117-SVZ-unstim” “17-003-GTS-LPS” “17-017-GFM-unstim” [13] “17-032-SVZ-IFNy” “13-087-CC-LPS” “14-075-GFM-IFNy”
[16] “14-075-GFM-LPS” “14-075-GFM-TNFa” “15-055-GFM-DEX”
[19] “15-089-GTS-unstim” “15-089-THA-LPS” “15-089-THA-unstim”
star_alignment_1m = general_stats$Sample[which(general_stats$STAR_mqc.generalstats.star.uniquely_mapped < 1000000)]
length(star_alignment_1m)
[1] 10
unique(star_alignment_1m)
[1] “18-064-GFM-IFNy” “18-064-GFM-unstim” “18-105-GTS-LPS”
[4] “MG-15-SN-RNA” “16-117-SVZ-unstim” “17-003-GTS-LPS”
[7] “15-055-GFM-DEX” “15-089-GTS-unstim” “15-089-THA-LPS”
[10] “15-089-THA-unstim”
ribosomal_bases_30 = general_stats$Sample[which(general_stats$Picard_mqc.generalstats.picard.PCT_RIBOSOMAL_BASES > 30)]
as.data.frame(ribosomal_bases_30)
ribosomal_bases_30 1 MG-08-CER-RNA 2 14-046-GFM-TNFa 3 14-046-GTS-unstim
ribosomal_bases_20 = general_stats$Sample[which(general_stats$Picard_mqc.generalstats.picard.PCT_RIBOSOMAL_BASES > 20)]
as.data.frame(ribosomal_bases_20)
ribosomal_bases_20 1 18-079-GFM-LPS 2 MG-08-CER-RNA 3 MG-13-CER-RNA 4 MG-16-CER-RNA 5 MG-24-HIP-RNA 6 MG-24-MGF-RNA 7 14-046-GFM-IL4 8 14-046-GFM-LPS 9 14-046-GFM-TNFa 10 14-046-GTS-unstim 11 14-046-GFM-DEX
ggplot(general_stats, aes(x=Picard_mqc.generalstats.picard.PCT_RIBOSOMAL_BASES)) +
geom_histogram(bins = 100,
colour="black", fill="white", xlab="test") +
labs(x="Percentage of rRNA", y="Number of samples") +
geom_vline(xintercept = 20, linetype="dotted", color="red", size=0.7) +
scale_y_continuous(breaks = (1:100))
mrna_mapping_10 = general_stats$Sample[which(general_stats$Picard_mqc.generalstats.picard.PCT_MRNA_BASES < 10)]
as.data.frame(mrna_mapping_10)
mrna_mapping_10
1 17-099-GTS-LPS 2 17-099-THA-IFNy 3 17-148-GTS-unstim 4 18-021-GTS-unstim 5 18-021-THA-LPS 6 16-110-GFM-IFNy 7 16-110-GFM-TNFa 8 16-110-THA-LPS 9 16-112-GFM-TNFa 10 16-112-GTS-IFNy 11 16-112-GTS-LPS 12 16-112-GTS-unstim 13 16-112-THA-LPS 14 16-112-THA-unstim 15 16-117-GFM-DEX 16 16-117-GFM-LPS 17 16-117-GFM-R848 18 16-118-GFM-LPS 19 16-118-GFM-TNFa 20 16-118-GTS-unstim 21 17-003-GTS-unstim 22 17-017-GTS-unstim 23 17-017-THA-LPS 24 17-029-GFM-LPS 25 17-043-GFM-IFNy 26 17-043-GFM-LPS 27 17-043-GFM-R848 28 17-043-GFM-TNFa 29 17-043-GFM-unstim 30 17-043-GTS-unstim 31 17-043-THA-LPS 32 17-078-GFM-LPS 33 17-078-GFM-R848 34 17-078-GFM-TNFa 35 17-078-GFM-unstim 36 13-056-GFM-IFNy 37 13-056-GFM-LPS 38 13-074-CC-LPS 39 13-095-CC-unstim 40 13-095-GFM-unstim 41 14-005-CC-unstim 42 14-005-GFM-TNFa 43 14-005-GFM-unstim 44 14-005-GTS-LPS 45 14-005-GTS-unstim 46 14-010-CC-LPS 47 14-015-GFM-unstim 48 14-015-GTS-LPS 49 14-015-GTS-unstim 50 14-046-GFM-unstim 51 14-051-GFM-LPS 52 14-053-CC-unstim 53 14-055-CC-IFNy 54 14-055-CC-LPS 55 14-055-CC-unstim 56 14-069-GTS-IFNy 57 14-069-GTS-LPS 58 14-069-GTS-unstim 59 14-074-CC-LPS 60 14-074-GFM-LPS 61 14-074-GFM-unstim 62 14-075-GFM-unstim 63 14-078-GFM-DEX 64 14-078-GFM-LPS 65 14-078-GFM-unstim 66 15-018-GFM-ATP 67 15-018-GFM-DEX 68 15-018-GFM-IL4 69 15-018-GFM-unstim 70 15-021-THA-unstim 71 15-024-THA-unstim 72 15-055-GFM-unstim 73 16-003-GFS-IFNy 74 16-003-GFS-LPS 75 16-003-SVZ-IFNy 76 16-003-SVZ-LPS 77 16-003-SVZ-R848 78 16-003-SVZ-TNFa 79 16-028-GTS-LPS 80 16-028-GTS-unstim 81 16-049-GTS-LPS 82 16-049-GTS-unstim 83 13-074-CC-IFNy 84 14-005-GFM-IFNy 85 14-005-GFM-R848 86 14-063-GTS-unstim 87 14-078-GFM-IL4 88 15-024-GFM-unstim 89 16-117-GFM-TNFa 90 17-003-THA-IFNy
mrna_mapping_5 = general_stats$Sample[which(general_stats$Picard_mqc.generalstats.picard.PCT_MRNA_BASES < 5)]
as.data.frame(mrna_mapping_5)
mrna_mapping_5
1 17-148-GTS-unstim 2 13-095-CC-unstim 3 14-005-GTS-LPS 4 14-046-GFM-unstim 5 14-069-GTS-LPS 6 14-074-GFM-LPS 7 14-078-GFM-DEX 8 14-078-GFM-LPS 9 15-018-GFM-ATP 10 15-021-THA-unstim 11 14-063-GTS-unstim
ggplot(general_stats, aes(x=Picard_mqc.generalstats.picard.PCT_MRNA_BASES)) +
geom_histogram(bins = 100,
colour="black", fill="white", xlab="test") +
labs(x="Percentage of rRNA", y="Number of samples") +
geom_vline(xintercept = 5, linetype="dotted", color="red", size=0.7) +
scale_y_continuous(breaks = (1:100))
ggplot(general_stats, aes(x=Picard_mqc.generalstats.picard.PCT_MRNA_BASES)) +
geom_histogram(bins = 100,
colour="black", fill="white", xlab="test") +
labs(x="Percentage of rRNA", y="Number of samples") +
geom_vline(xintercept = 10, linetype="dotted", color="red", size=0.7) +
scale_y_continuous(breaks = (1:100))
#check dataframe
multiqc_picard_coverage[1:10, 1:6]
0 1 2 3 4 5
13-051-NA-IFNy 0.300965 0.342670 0.395262 0.450857 0.500259 0.548744 13-051-NA-LPS 0.288671 0.327072 0.373270 0.424037 0.473820 0.527181 13-051-NA-unstim 0.294282 0.337845 0.399570 0.458347 0.512271 0.568627 13-054-CC-IFNy 0.242727 0.278563 0.327355 0.371304 0.418472 0.469338 13-054-CC-LPS 0.244685 0.278994 0.330070 0.378772 0.426247 0.477317 13-054-CC-unstim 0.258629 0.293941 0.343500 0.394874 0.444210 0.496089 13-056-GFM-IFNy 0.296730 0.336912 0.390717 0.439447 0.485305 0.533475 13-056-GFM-LPS 0.295048 0.330878 0.378005 0.426640 0.471571 0.516512 13-056-GFM-TNFa 0.304426 0.345266 0.401569 0.456734 0.511181 0.565316 13-056-GFM-unstim 0.294218 0.334060 0.387388 0.437122 0.486448 0.537226
#create plot
x = colnames(multiqc_picard_coverage)
x = as.numeric(x)
colfunc <- colorRampPalette(c("#4DBBD5FF", "#3C5488FF")) # Blue colors
colors = alpha(colfunc(nrow(multiqc_picard_coverage)), alpha = 0.5) # Ajuste o alpha pra ter mais ou menos transparencia
y = multiqc_picard_coverage[1,]
plot(x, y, type = "l", xlab = "Gene body", ylab = "Read coverage", col=colors[1], ylim = c(0,2))
for (i in 2:nrow(multiqc_picard_coverage))
{
y = multiqc_picard_coverage[i,]
lines(x,y, col=colors[i])
}
cov_95 = multiqc_picard_coverage$'95'
cov_95 = as.data.frame(cov_95)
rownames(cov_95) = rownames(multiqc_picard_coverage)
colnames(cov_95) = c("position_95")
cov_95_ordered = cov_95[order(cov_95$position_95, decreasing = T),,drop=F]
data.table(cov_95_ordered)
position_95
1: 1.260062 2: 1.123975 3: 1.060071 4: 1.045566 5: 1.040268 —
529: 0.610565 530: 0.609280 531: 0.604104 532: 0.579089 533: 0.569884
cov_95 = multiqc_picard_coverage$'95'
cov_95 = as.data.frame(cov_95)
rownames(cov_95) = rownames(multiqc_picard_coverage)
colnames(cov_95) = c("position_95")
cov_95_ordered = cov_95[order(cov_95$position_95, decreasing = T),,drop=F]
data.table(cov_95_ordered)
position_95
1: 1.260062 2: 1.123975 3: 1.060071 4: 1.045566 5: 1.040268 —
529: 0.610565 530: 0.609280 531: 0.604104 532: 0.579089 533: 0.569884
526 position 95 1.26 MG-24-HIP-RNA 494 position 95 1.12 MG-12-CER-RNA 130 position 95 1.06 15-089-GTS-unstim 503 position 95 1.04 MG-15-SVZ-RNA 492 position 95 1.04 MG-08-CER-RNA
exclude = rownames(cov_95_ordered)[1:6]
multiqc_picard_coverage_filt = multiqc_picard_coverage[! rownames(multiqc_picard_coverage) %in% exclude ,]
x = colnames(multiqc_picard_coverage_filt)
x = as.numeric(x)
colfunc <- colorRampPalette(c("#4DBBD5FF", "#3C5488FF")) # Blue colors
colors = alpha(colfunc(nrow(multiqc_picard_coverage_filt)), alpha = 0.5) # Ajuste o alpha pra ter mais ou menos transparencia
y = multiqc_picard_coverage_filt[1,]
plot(x, y, type = "l", xlab = "Gene body", ylab = "Read coverage", col=colors[1], ylim = c(0,2))
for (i in 2:nrow(multiqc_picard_coverage_filt))
{
y = multiqc_picard_coverage_filt[i,]
lines(x,y, col=colors[i])
}
coding_mapping = picard_rnaseqmetrics[,"Coding"] / rowSums(picard_rnaseqmetrics[,-1])
names(coding_mapping) = picard_rnaseqmetrics$Sample
filter_coding = coding_mapping < 0.05
names(filter_coding)[which(filter_coding == TRUE)]
[1] “13-051-NA-IFNy” “13-051-NA-LPS” “13-056-GFM-IFNy”
[4] “13-056-GFM-LPS” “13-074-CC-IFNy” “13-074-CC-LPS”
[7] “13-077-CC-LPS” “13-095-CC-unstim” “13-095-GFM-unstim” [10] “14-005-CC-unstim” “14-005-GFM-IFNy” “14-005-GFM-R848”
[13] “14-005-GFM-TNFa” “14-005-GFM-unstim” “14-005-GTS-LPS”
[16] “14-005-GTS-unstim” “14-010-CC-LPS” “14-015-GFM-unstim” [19] “14-015-GTS-LPS” “14-015-GTS-unstim” “14-046-GFM-unstim” [22] “14-051-GFM-LPS” “14-053-CC-unstim” “14-055-CC-IFNy”
[25] “14-055-CC-LPS” “14-055-CC-unstim” “14-063-GTS-unstim” [28] “14-069-GTS-IFNy” “14-069-GTS-LPS” “14-069-GTS-unstim” [31] “14-074-CC-LPS” “14-074-CC-unstim” “14-074-GFM-LPS”
[34] “14-074-GFM-unstim” “14-075-GFM-unstim” “14-078-GFM-DEX”
[37] “14-078-GFM-IL4” “14-078-GFM-LPS” “14-078-GFM-unstim” [40] “15-018-GFM-ATP” “15-018-GFM-DEX” “15-018-GFM-IL4”
[43] “15-018-GFM-LPS” “15-018-GFM-unstim” “15-021-THA-unstim” [46] “15-024-GFM-unstim” “15-024-THA-unstim” “15-055-GFM-unstim” [49] “16-003-GFS-IFNy” “16-003-GFS-LPS” “16-003-SVZ-IFNy”
[52] “16-003-SVZ-LPS” “16-003-SVZ-R848” “16-003-SVZ-TNFa”
[55] “16-003-THA-LPS” “16-028-GTS-LPS” “16-028-GTS-unstim” [58] “16-049-GTS-LPS” “16-049-GTS-unstim” “16-110-GFM-IFNy”
[61] “16-110-GFM-TNFa” “16-110-THA-LPS” “16-112-GFM-LPS”
[64] “16-112-GFM-TNFa” “16-112-GTS-IFNy” “16-112-GTS-LPS”
[67] “16-112-GTS-unstim” “16-112-THA-LPS” “16-112-THA-unstim” [70] “16-117-GFM-DEX” “16-117-GFM-LPS” “16-117-GFM-R848”
[73] “16-117-GFM-TNFa” “16-118-GFM-IFNy” “16-118-GFM-LPS”
[76] “16-118-GFM-TNFa” “16-118-GTS-LPS” “16-118-GTS-unstim” [79] “17-003-GTS-unstim” “17-003-THA-IFNy” “17-017-GTS-unstim” [82] “17-017-THA-LPS” “17-029-GFM-LPS” “17-029-GTS-unstim” [85] “17-043-GFM-IFNy” “17-043-GFM-LPS” “17-043-GFM-R848”
[88] “17-043-GFM-TNFa” “17-043-GFM-unstim” “17-043-GTS-unstim” [91] “17-043-THA-LPS” “17-078-GFM-LPS” “17-078-GFM-R848”
[94] “17-078-GFM-TNFa” “17-078-GFM-unstim” “17-094-GTS-LPS”
[97] “17-097-GTS-LPS” “17-099-GTS-LPS” “17-099-THA-IFNy”
[100] “17-124-GFM-unstim” “17-148-GTS-LPS” “17-148-GTS-unstim” [103] “18-0198-GFM-LPS” “18-021-GTS-unstim” “18-021-THA-LPS”
[106] “18-064-GFM-ATP”
data.table(filter_coding)
filter_coding
1: TRUE 2: TRUE 3: FALSE 4: FALSE 5: FALSE —
529: FALSE 530: FALSE 531: FALSE 532: FALSE 533: FALSE
outlier_samples = c("MG-16-CER-RNA", "MG-08-CER-RNA", "13-095-GTS-unstim", "MG-15-SVZ-RNA", "MG-24-MGF-RNA", "14-046-GTS-unstim", "14-046-GFM-IL4", "16-117-THA-LPS", "16-074-GFM-unstim", "16-003-SVZ-TNFa")
Degraded_RNA = c("MG-24-HIP-RNA", "MG-12-CER-RNA", "15-089-GTS-unstim", "MG-15-SVZ-RNA", "MG-08-CER-RNA")
list_filters = list(filter1 = star_alignment_1m, filter2 = mrna_mapping_5, filter3 = ribosomal_bases_20, filter4 = outlier_samples, filter5 = Degraded_RNA)
list_filters
$filter1 [1] “18-064-GFM-IFNy” “18-064-GFM-unstim” “18-105-GTS-LPS”
[4] “MG-15-SN-RNA” “16-117-SVZ-unstim” “17-003-GTS-LPS”
[7] “15-055-GFM-DEX” “15-089-GTS-unstim” “15-089-THA-LPS”
[10] “15-089-THA-unstim”
$filter2 [1] “17-148-GTS-unstim” “13-095-CC-unstim” “14-005-GTS-LPS”
[4] “14-046-GFM-unstim” “14-069-GTS-LPS” “14-074-GFM-LPS”
[7] “14-078-GFM-DEX” “14-078-GFM-LPS” “15-018-GFM-ATP”
[10] “15-021-THA-unstim” “14-063-GTS-unstim”
$filter3 [1] “18-079-GFM-LPS” “MG-08-CER-RNA” “MG-13-CER-RNA”
[4] “MG-16-CER-RNA” “MG-24-HIP-RNA” “MG-24-MGF-RNA”
[7] “14-046-GFM-IL4” “14-046-GFM-LPS” “14-046-GFM-TNFa”
[10] “14-046-GTS-unstim” “14-046-GFM-DEX”
$filter4 [1] “MG-16-CER-RNA” “MG-08-CER-RNA” “13-095-GTS-unstim” [4] “MG-15-SVZ-RNA” “MG-24-MGF-RNA” “14-046-GTS-unstim” [7] “14-046-GFM-IL4” “16-117-THA-LPS” “16-074-GFM-unstim” [10] “16-003-SVZ-TNFa”
$filter5 [1] “MG-24-HIP-RNA” “MG-12-CER-RNA” “15-089-GTS-unstim” [4] “MG-15-SVZ-RNA” “MG-08-CER-RNA”
samples2remove = unlist(list_filters)
unique(samples2remove)
[1] “18-064-GFM-IFNy” “18-064-GFM-unstim” “18-105-GTS-LPS”
[4] “MG-15-SN-RNA” “16-117-SVZ-unstim” “17-003-GTS-LPS”
[7] “15-055-GFM-DEX” “15-089-GTS-unstim” “15-089-THA-LPS”
[10] “15-089-THA-unstim” “17-148-GTS-unstim” “13-095-CC-unstim” [13] “14-005-GTS-LPS” “14-046-GFM-unstim” “14-069-GTS-LPS”
[16] “14-074-GFM-LPS” “14-078-GFM-DEX” “14-078-GFM-LPS”
[19] “15-018-GFM-ATP” “15-021-THA-unstim” “14-063-GTS-unstim” [22] “18-079-GFM-LPS” “MG-08-CER-RNA” “MG-13-CER-RNA”
[25] “MG-16-CER-RNA” “MG-24-HIP-RNA” “MG-24-MGF-RNA”
[28] “14-046-GFM-IL4” “14-046-GFM-LPS” “14-046-GFM-TNFa”
[31] “14-046-GTS-unstim” “14-046-GFM-DEX” “13-095-GTS-unstim” [34] “MG-15-SVZ-RNA” “16-117-THA-LPS” “16-074-GFM-unstim” [37] “16-003-SVZ-TNFa” “MG-12-CER-RNA”
upset(fromList(list_filters), order.by = "freq")
Degraded_RNA = c("MG-24-HIP-RNA", "MG-12-CER-RNA", "15-089-GTS-unstim", "MG-15-SVZ-RNA", "MG-08-CER-RNA")
list_filters = list(filter1 = star_alignment_5m, filter2 = mrna_mapping_5, filter3 = ribosomal_bases_20, filter4 = Degraded_RNA)
list_filters
$filter1 [1] “18-021-GFM-DEX” “18-021-GFM-LPS” “18-064-GFM-ATP”
[4] “18-064-GFM-IFNy” “18-064-GFM-unstim” “18-105-GTS-LPS”
[7] “18-105-GTS-unstim” “18-118-GFM-IL4” “MG-15-SN-RNA”
[10] “16-117-SVZ-unstim” “17-003-GTS-LPS” “17-017-GFM-unstim” [13] “17-032-SVZ-IFNy” “13-087-CC-LPS” “14-075-GFM-IFNy”
[16] “14-075-GFM-LPS” “14-075-GFM-TNFa” “15-055-GFM-DEX”
[19] “15-089-GTS-unstim” “15-089-THA-LPS” “15-089-THA-unstim”
$filter2 [1] “17-148-GTS-unstim” “13-095-CC-unstim” “14-005-GTS-LPS”
[4] “14-046-GFM-unstim” “14-069-GTS-LPS” “14-074-GFM-LPS”
[7] “14-078-GFM-DEX” “14-078-GFM-LPS” “15-018-GFM-ATP”
[10] “15-021-THA-unstim” “14-063-GTS-unstim”
$filter3 [1] “18-079-GFM-LPS” “MG-08-CER-RNA” “MG-13-CER-RNA”
[4] “MG-16-CER-RNA” “MG-24-HIP-RNA” “MG-24-MGF-RNA”
[7] “14-046-GFM-IL4” “14-046-GFM-LPS” “14-046-GFM-TNFa”
[10] “14-046-GTS-unstim” “14-046-GFM-DEX”
$filter4 [1] “MG-24-HIP-RNA” “MG-12-CER-RNA” “15-089-GTS-unstim” [4] “MG-15-SVZ-RNA” “MG-08-CER-RNA”
samples2remove = unlist(list_filters)
unique(samples2remove)
[1] “18-021-GFM-DEX” “18-021-GFM-LPS” “18-064-GFM-ATP”
[4] “18-064-GFM-IFNy” “18-064-GFM-unstim” “18-105-GTS-LPS”
[7] “18-105-GTS-unstim” “18-118-GFM-IL4” “MG-15-SN-RNA”
[10] “16-117-SVZ-unstim” “17-003-GTS-LPS” “17-017-GFM-unstim” [13] “17-032-SVZ-IFNy” “13-087-CC-LPS” “14-075-GFM-IFNy”
[16] “14-075-GFM-LPS” “14-075-GFM-TNFa” “15-055-GFM-DEX”
[19] “15-089-GTS-unstim” “15-089-THA-LPS” “15-089-THA-unstim” [22] “17-148-GTS-unstim” “13-095-CC-unstim” “14-005-GTS-LPS”
[25] “14-046-GFM-unstim” “14-069-GTS-LPS” “14-074-GFM-LPS”
[28] “14-078-GFM-DEX” “14-078-GFM-LPS” “15-018-GFM-ATP”
[31] “15-021-THA-unstim” “14-063-GTS-unstim” “18-079-GFM-LPS”
[34] “MG-08-CER-RNA” “MG-13-CER-RNA” “MG-16-CER-RNA”
[37] “MG-24-HIP-RNA” “MG-24-MGF-RNA” “14-046-GFM-IL4”
[40] “14-046-GFM-LPS” “14-046-GFM-TNFa” “14-046-GTS-unstim” [43] “14-046-GFM-DEX” “MG-12-CER-RNA” “MG-15-SVZ-RNA”
upset(fromList(list_filters), order.by = "freq")
setwd("~/Documents/MiGASti/Databases")
Degraded_RNA = c("MG-24-HIP-RNA", "MG-12-CER-RNA", "15-089-GTS-unstim", "MG-15-SVZ-RNA", "MG-08-CER-RNA")
duplicates =c("15-024-GFM_unstim", "16-074-GFM-unstim")
outliers =c("13-095-GTS-unstim")
list_filters = list(filter1 = star_alignment_1m, filter2 = mrna_mapping_5, filter3 = ribosomal_bases_20, filter4 = Degraded_RNA, filter5 = duplicates, filter6 = outliers)
list_filters
$filter1 [1] “18-064-GFM-IFNy” “18-064-GFM-unstim” “18-105-GTS-LPS”
[4] “MG-15-SN-RNA” “16-117-SVZ-unstim” “17-003-GTS-LPS”
[7] “15-055-GFM-DEX” “15-089-GTS-unstim” “15-089-THA-LPS”
[10] “15-089-THA-unstim”
$filter2 [1] “17-148-GTS-unstim” “13-095-CC-unstim” “14-005-GTS-LPS”
[4] “14-046-GFM-unstim” “14-069-GTS-LPS” “14-074-GFM-LPS”
[7] “14-078-GFM-DEX” “14-078-GFM-LPS” “15-018-GFM-ATP”
[10] “15-021-THA-unstim” “14-063-GTS-unstim”
$filter3 [1] “18-079-GFM-LPS” “MG-08-CER-RNA” “MG-13-CER-RNA”
[4] “MG-16-CER-RNA” “MG-24-HIP-RNA” “MG-24-MGF-RNA”
[7] “14-046-GFM-IL4” “14-046-GFM-LPS” “14-046-GFM-TNFa”
[10] “14-046-GTS-unstim” “14-046-GFM-DEX”
$filter4 [1] “MG-24-HIP-RNA” “MG-12-CER-RNA” “15-089-GTS-unstim” [4] “MG-15-SVZ-RNA” “MG-08-CER-RNA”
$filter5 [1] “15-024-GFM_unstim” “16-074-GFM-unstim”
$filter6 [1] “13-095-GTS-unstim”
samples2remove = unlist(list_filters)
samples2remove
filter11 filter12 filter13 filter14
“18-064-GFM-IFNy” “18-064-GFM-unstim” “18-105-GTS-LPS” “MG-15-SN-RNA” filter15 filter16 filter17 filter18 “16-117-SVZ-unstim” “17-003-GTS-LPS” “15-055-GFM-DEX” “15-089-GTS-unstim” filter19 filter110 filter21 filter22 “15-089-THA-LPS” “15-089-THA-unstim” “17-148-GTS-unstim” “13-095-CC-unstim” filter23 filter24 filter25 filter26 “14-005-GTS-LPS” “14-046-GFM-unstim” “14-069-GTS-LPS” “14-074-GFM-LPS” filter27 filter28 filter29 filter210 “14-078-GFM-DEX” “14-078-GFM-LPS” “15-018-GFM-ATP” “15-021-THA-unstim” filter211 filter31 filter32 filter33 “14-063-GTS-unstim” “18-079-GFM-LPS” “MG-08-CER-RNA” “MG-13-CER-RNA” filter34 filter35 filter36 filter37 “MG-16-CER-RNA” “MG-24-HIP-RNA” “MG-24-MGF-RNA” “14-046-GFM-IL4” filter38 filter39 filter310 filter311 “14-046-GFM-LPS” “14-046-GFM-TNFa” “14-046-GTS-unstim” “14-046-GFM-DEX” filter41 filter42 filter43 filter44 “MG-24-HIP-RNA” “MG-12-CER-RNA” “15-089-GTS-unstim” “MG-15-SVZ-RNA” filter45 filter51 filter52 filter6 “MG-08-CER-RNA” “15-024-GFM_unstim” “16-074-GFM-unstim” “13-095-GTS-unstim”
unique(samples2remove)
[1] “18-064-GFM-IFNy” “18-064-GFM-unstim” “18-105-GTS-LPS”
[4] “MG-15-SN-RNA” “16-117-SVZ-unstim” “17-003-GTS-LPS”
[7] “15-055-GFM-DEX” “15-089-GTS-unstim” “15-089-THA-LPS”
[10] “15-089-THA-unstim” “17-148-GTS-unstim” “13-095-CC-unstim” [13] “14-005-GTS-LPS” “14-046-GFM-unstim” “14-069-GTS-LPS”
[16] “14-074-GFM-LPS” “14-078-GFM-DEX” “14-078-GFM-LPS”
[19] “15-018-GFM-ATP” “15-021-THA-unstim” “14-063-GTS-unstim” [22] “18-079-GFM-LPS” “MG-08-CER-RNA” “MG-13-CER-RNA”
[25] “MG-16-CER-RNA” “MG-24-HIP-RNA” “MG-24-MGF-RNA”
[28] “14-046-GFM-IL4” “14-046-GFM-LPS” “14-046-GFM-TNFa”
[31] “14-046-GTS-unstim” “14-046-GFM-DEX” “MG-12-CER-RNA”
[34] “MG-15-SVZ-RNA” “15-024-GFM_unstim” “16-074-GFM-unstim” [37] “13-095-GTS-unstim”
write.table(samples2remove, "samples2remove.txt")
upset(fromList(list_filters), order.by = "freq")
upset(fromList(list_filters), order.by = "freq")
Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.